from flask import Flask,redirect,render_template,url_for
import pandas as pd
import re
import collections
import json

data = pd.read_csv('boss1.csv')

data['post'] = data['title']
data['post'][data['post'].str.contains(r'开发|研发')] = '大数据开发工程师'
data['post'][data['post'].str.contains(r'架构')] = '大数据架构工程师'
data['post'][data['post'].str.contains(r'挖掘')] = '数据挖掘工程师'
data['post'][data['post'].str.contains(r'运维|维护')] = '数据运维工程师'
data['post'][data['post'].str.contains(r'分析')] = '数据分析师'
data['post'][data['post'].str.contains(r'算法')] = '算法工程师'
data['post'][data['post'].str.contains(r'可视化')] = '大数据可视化工程师'
data[~ data['post'].str.contains('开发|架构|挖掘|运维|分析|算法|可视化')] = '其他'

a = ""
b = ""
c = ""
d = ""
e = ""
f = ""
g = ""
h = ""
num = 0
for line in data['post']:
    if line == '大数据开发工程师':
        a += str(data['need'][num])
    elif line == '大数据架构工程师':
        b += str(data['need'][num])
    elif line == '数据挖掘工程师':
        c += str(data['need'][num])
    elif line == '数据运维工程师':
        d += str(data['need'][num])
    elif line == '数据分析师':
        e += str(data['need'][num])
    elif line == '算法工程师':
        f += str(data['need'][num])
    elif line == '大数据可视化工程师':
        g += str(data['need'][num])
    elif line == '其他':
        h += str(data['need'][num])
    num += 1


def getre(data):
    name = []
    value = []
    resa = []
    res = re.findall('[a-zA-Z]+', data)
    for i in res:
        i = i.capitalize()
        resa.append(i)
    Word_count = collections.Counter(resa)
    new = Word_count.most_common(5)
    # print(Word_count)
    # for word,key in Word_count:
    #     name.append(word)
    #     value.append(key)
    return new


name = ['大数据开发工程师', '大数据架构工程师', '数据挖掘工程师', '数据运维工程师', '数据分析师', '算法工程师', '大数据可视化工程师']

def getaa(aaa):
        i = str(aaa)
        aa = getre(i)
        words = []
        keys = []
        for word, key in aa:
            words.append(word)
            keys.append(key)
        return words,keys

worda,keya=getaa(a)
wordb,keyb=getaa(b)
wordc,keyc=getaa(c)
wordd,keyd=getaa(d)
worde,keye=getaa(e)
wordf,keyf=getaa(f)
wordg,keyg=getaa(g)

jsonData = {}
name_num = []

name_num=[sum(keya),sum(keyb),sum(keyc),sum(keyd),sum(keye),sum(keyf),sum(keyg)]
jsonData['name']=name
jsonData['name_num']=name_num
jsonData['keya']=keya
jsonData['keyb']=keyb
jsonData['keyc']=keyc
jsonData['keyd']=keyd
jsonData['keye']=keye
jsonData['keyf']=keyf
jsonData['keyg']=keyg
j=json.jumps(jsonData)